********************************************************************************************************************************
***   Replication file for:                                                                                                  ***
***   Rafiee Rad, S., Braun, S. T., and Roy, O. (2023). Anchoring as a Structural Bias of Deliberation.					     ***
***   							                                                                                             ***
***   SCRIPT: 	0_data_preparation.do																						 ***	
***   PURPOSE: 	main script for data preparation																			 ***
********************************************************************************************************************************

* Preamble (unnecessary when executing run.do)
run "$anchoring/scripts/programs/_config.do"

***************
* Code begins
***************

******************
*** Read in data

* Set maximum group size we want to work with
local maxsize = 99
local maxizeminusone = `maxsize' - 1

** Simulation characteristics: Group size, randomized reliabilities, non-strict voting, number of simulations

forvalues i = 3(4)`maxsize'{
import delimited "$anchoring/data/baseline/anchoring`i'.csv", clear
gen int groupsize = `i'
*gen str10 reliability = "randomized"
*gen str10 voting = "nonstrict"
gen int simulation_no = _n
save "$anchoring/processed/baseline/anchoring`i'.dta", replace
}

********************************
*** Construct agent-level data

** Append Data

use "$anchoring/processed/baseline/anchoring3.dta", clear 

forvalues i = 7(4)`maxsize'{
append using "$anchoring/processed/baseline/anchoring`i'.dta"
}

forvalues i = 0(1)`maxizeminusone'{
rename agent`i'ini agentini`i' 
rename agent`i'dp agentdp`i' 
rename agent`i'ks agentks`i' 
rename agent`i'cs agentcs`i' 

rename distancetoagent`i'dp distancetoagentdp`i' 
rename distancetoagent`i'ks distancetoagentks`i' 
rename distancetoagent`i'cs distancetoagentcs`i' 
}

sort groupsize simulation_no
gen ID = _n
drop simulation_no

** Reshape to long dataset
tolong agentini agentdp agentks agentcs distancetoagentdp distancetoagentks distancetoagentcs reliability, i(ID) j(agent)


** Prepare data
drop distancetotheexpertdp distancetoexpertks distancetotheexpertcs
drop if (agent + 1) > groupsize

label var ID "Unique simulation identifier"
label var agent "Agent number"

gen d_expert = (agent == expert)
label var d_expert "Indicator (0/1) for agent being the expert"
drop expert

gen d_firstspeaker = (agent == 0)
label var d_firstspeaker "Indicator (0/1) for agent being the first speaker"

gen d_dptransitive = (dptransitive == "true")
label var d_dptransitive "Indicator (0/1) for transitive profile (0/1), DP measure"

gen d_kstransitive = (kstransitive == "true")
label var d_kstransitive "Indicator (0/1) for transitive profile (0/1), KS measure"

gen d_cstransitive = (cstransitive == "true")
label var d_cstransitive "Indicator (0/1) for transitive profile (0/1), CS measure"

foreach x in dp ks cs{
bysort ID: egen min_distance_`x' = min(distancetoagent`x')
label var min_distance_`x' "Minimum distance in group, `x'"
}

drop dptransitive kstransitive cstransitive selfexpert

* agentini agentdp agentks agentcs 

foreach x in dp ks cs{
gen d_min_distance_`x' = 0
replace d_min_distance_`x' = 1 if distancetoagent`x' == min_distance_`x'
label var d_min_distance_`x' "Indicator variable (0/1) for agents having the lowest distance in group"
drop min_distance_`x'
}


** Rename and label 

forvalues i = 0(1)12{
	label var numvotes`i' "Number of votes for preference ranking `i', initial profile"
}

label var freqvoteag1 "Number of votes for speaker one's ranking, initial profile"

rename averexpertiseingroup aver_expertise_group
label var aver_expertise_group "Average expertise in group"

rename diffexpertisefirstspeakerandgrou diff_expertise_first_and_group
label var diff_expertise_first_and_group "Difference between expertise of first speaker and group average"

rename diffexpertisefirstspeakerandexpe diff_expertise_first_and_expert 
label var diff_expertise_first_and_expert "Difference between expertise of first speaker and expert"

rename diffexpertiseexpertandgroupavera diff_expertise_expert_and_group
label var  diff_expertise_expert_and_group "Difference between expertise of expert and group average"

foreach x in ini dp ks cs{
	
	rename `x'proxtospeak prox_single_peak_`x'
	label var prox_single_peak_`x' "Proximity to single peakness, `x' measure"
	
	rename `x'proxspl prox_single_plateau_`x'
	label var prox_single_plateau_`x' "Proximity to single plateauness, `x' measure"
	
} 

foreach x in dp ks cs{
	
	label var distancetoagent`x' "Distance b/w agent's initial ranking and ranking after deliberation, `x' measure" 
	label var agent`x' "Agent's ranking after deliberation, `x' measure"
} 

label var agentini "Agent's initial ranking before deliberation"

label var groupsize "Group size (#)"

label var reliability "Expertise of agent"

** Save dataset
save "$anchoring/processed/data_agent_level.dta", replace


************************************
*** Construct model-run-level data
use "$anchoring/processed/data_agent_level.dta", clear


** Generate variable for distance to expert and first speaker, maximum distance, average distance (for each model run)
foreach x in dp ks cs{
	
gen help = distancetoagent`x' if d_firstspeaker == 1
bysort ID: egen distance_first_`x' = mean(help)
label var distance_first_`x' "Distance for first speaker, `x' measure" 
drop help

gen help = distancetoagent`x' if d_firstspeaker == 0
bysort ID: egen distance_avg_wofirst_`x' = mean(help)
label var distance_avg_wofirst_`x' "Average distance in group for all speakers except first, `x' measure"
drop help

gen help = distancetoagent`x' if d_firstspeaker == 0 & d_expert == 0
bysort ID: egen distance_avg_wofirstexp_`x' = mean(help)
label var distance_avg_wofirstexp_`x' "Average distance in group for all speakers except first and expert, `x' measure"
drop help

gen help = distancetoagent`x' if d_expert == 1
bysort ID: egen distance_expert_`x' = mean(help)
label var distance_expert_`x' "Distance for expert, `x' measure"
drop help

bysort ID: egen max_distance_`x' = max(distancetoagent`x')
label var max_distance_`x' "Maximum distance in group, `x' measure"

bysort ID: egen avg_distance_`x' = mean(distancetoagent`x')
label var avg_distance_`x' "Average distance in group, `x' measure"

bysort ID: egen min_distance_`x' = min(distancetoagent`x')
label var min_distance_`x' "Minimum distance in group, `x' measure"

}


** Generate averages for non-first speakers and non-experts by groupsize

* Non first speakers

foreach x in dp ks cs{
	gen help = d_min_distance_`x' if d_firstspeaker == 0 
	bysort groupsize: egen d_min_distance_nonfirst_`x' = mean(help)
	label var d_min_distance_nonfirst_`x' "Avg. share of non-first speakers who have the lowest distance in group, by groupsize"
	drop help 
}
 

* Non first speakers/non-experts
 
foreach x in dp ks cs{
	gen help = d_min_distance_`x' if d_firstspeaker == 0 & d_expert == 0
	bysort groupsize: egen d_min_distance_nonfirstexp_`x' = mean(help)
	label var d_min_distance_nonfirstexp_`x' "Avg. share of non-first, non-experts speakers who have the lowest distance in group, by groupsize"
	drop help 
}
  
 
** Keep only model-run-level variables
keep ID numvotes* freqvoteag1 aver_expertise_group diff_expertise_first_and_group diff_expertise_first_and_expert groupsize *prox* distance_first_* distance_expert_* max_distance_* min_distance_* avg_distance_* distance_avg_wofirst* d_min_distance_nonfirst* d_*transitive

** Keep only one observation for each run
sort ID
drop if ID == ID[_n-1]

** Save dataset
save "$anchoring/processed/data_model_run_level.dta.dta", replace





******************
*** Read in data (immodest agents)

* Set maximum group size we want to work with
local maxsize = 99
local maxizeminusone = `maxsize' - 1

** Simulation characteristics: Group size, randomized reliabilities, non-strict voting, number of simulations

forvalues i = 3(4)`maxsize'{
import delimited "$anchoring/data/immodest/anchoring`i'.csv", clear
gen int groupsize = `i'
*gen str10 reliability = "randomized"
*gen str10 voting = "nonstrict"
gen int simulation_no = _n
save "$anchoring/processed/immodest/anchoring`i'.dta", replace
}

********************************
*** Construct agent-level data

** Append Data

use "$anchoring/processed/immodest/anchoring3.dta", clear 

forvalues i = 7(4)`maxsize'{
append using "$anchoring/processed/immodest/anchoring`i'.dta"
}

forvalues i = 0(1)`maxizeminusone'{
rename agent`i'ini agentini`i' 
rename agent`i'dp agentdp`i' 
rename agent`i'ks agentks`i' 
rename agent`i'cs agentcs`i' 

rename distancetoagent`i'dp distancetoagentdp`i' 
rename distancetoagent`i'ks distancetoagentks`i' 
rename distancetoagent`i'cs distancetoagentcs`i' 
}

sort groupsize simulation_no
gen ID = _n
drop simulation_no

** Reshape to long dataset
tolong agentini agentdp agentks agentcs distancetoagentdp distancetoagentks distancetoagentcs reliability, i(ID) j(agent)

** Prepare data
drop distancetotheexpertdp distancetoexpertks distancetotheexpertcs
drop if (agent + 1) > groupsize

label var ID "Unique simulation identifier"
label var agent "Agent number"

gen d_expert = (agent == expert)
label var d_expert "Indicator (0/1) for agent being the expert"
drop expert

gen d_firstspeaker = (agent == 0)
label var d_firstspeaker "Indicator (0/1) for agent being the first speaker"

gen d_dptransitive = (dptransitive == "true")
label var d_dptransitive "Indicator (0/1) for transitive profile (0/1), DP measure"

gen d_kstransitive = (kstransitive == "true")
label var d_kstransitive "Indicator (0/1) for transitive profile (0/1), KS measure"

gen d_cstransitive = (cstransitive == "true")
label var d_cstransitive "Indicator (0/1) for transitive profile (0/1), CS measure"

foreach x in dp ks cs{
bysort ID: egen min_distance_`x' = min(distancetoagent`x')
label var min_distance_`x' "Minimum distance in group, `x'"
}

drop dptransitive kstransitive cstransitive 

*selfexpert agentini agentdp agentks agentcs distancetoagentdp distancetoagentks distancetoagentcs

foreach x in dp ks cs{
gen d_min_distance_`x' = 0
replace d_min_distance_`x' = 1 if distancetoagent`x' == min_distance_`x'
label var d_min_distance_`x' "Indicator variable (0/1) for agents having the lowest distance in group"
drop min_distance_`x'
}


** Rename and label 

forvalues i = 0(1)12{
	label var numvotes`i' "Number of votes for preference ranking `i', initial profile"
}

label var freqvoteag1 "Number of votes for speaker one's ranking, initial profile"

rename averexpertiseingroup aver_expertise_group
label var aver_expertise_group "Average expertise in group"

rename diffexpertisefirstspeakerandgrou diff_expertise_first_and_group
label var diff_expertise_first_and_group "Difference between expertise of first speaker and group average"

rename diffexpertisefirstspeakerandexpe diff_expertise_first_and_expert 
label var diff_expertise_first_and_expert "Difference between expertise of first speaker and expert"

rename diffexpertiseexpertandgroupavera diff_expertise_expert_and_group
label var  diff_expertise_expert_and_group "Difference between expertise of expert and group average"

foreach x in ini dp ks cs{
	
	rename `x'proxtospeak prox_single_peak_`x'
	label var prox_single_peak_`x' "Proximity to single peakness, `x' measure"
	
	rename `x'proxspl prox_single_plateau_`x'
	label var prox_single_plateau_`x' "Proximity to single plateauness, `x' measure"
	
} 

foreach x in dp ks cs{
	
	label var distancetoagent`x' "Distance b/w agent's initial ranking and ranking after deliberation, `x' measure" 

} 

label var reliability "Expertise of agent"

label var groupsize "Group size (#)"


** Save dataset
save "$anchoring/processed/data_agent_level_immodest.dta", replace


*** EOF